knitr::opts_chunk$set(
echo = TRUE,
include = TRUE,
message = FALSE,
warning = FALSE,
fig.width = 12,
fig.asp = .6,
fig.align = "center",
out.width = "90%"
)
library(tidyverse)
library(dplyr)
library(arsenal)
library(HH)
library(leaps)
library(corrplot)
library(faraway)
library(ggpubr)
library(broom)
library(ggplot2)
library(MASS)
library(patchwork)
library(caret)
hate_crime = read_csv("data/HateCrimes.csv", col_types = "fffdddddd") %>%
janitor::clean_names() %>%
drop_na()
| Overall (N=45) | |
|---|---|
| Level of unemployment | |
| - high | 23 (51.1%) |
| - low | 22 (48.9%) |
| Level of state urbanization | |
| - low | 21 (46.7%) |
| - high | 24 (53.3%) |
| Median Household Income | |
| - Mean (SD) | 55299.49 (8979.49) |
| - Median (Q1, Q3) | 54916.00 (48060.00, 60708.00) |
| - Min - Max | 39552.00 - 76165.00 |
| Percent of adults with a high school degree | |
| - Mean (SD) | 0.87 (0.03) |
| - Median (Q1, Q3) | 0.87 (0.84, 0.89) |
| - Min - Max | 0.80 - 0.92 |
| Percent of population that are not US citizens | |
| - Mean (SD) | 0.06 (0.03) |
| - Median (Q1, Q3) | 0.05 (0.03, 0.08) |
| - Min - Max | 0.01 - 0.13 |
| Income inequality index | |
| - Mean (SD) | 0.46 (0.02) |
| - Median (Q1, Q3) | 0.46 (0.44, 0.47) |
| - Min - Max | 0.42 - 0.53 |
| Percent of population that are non-white | |
| - Mean (SD) | 0.32 (0.15) |
| - Median (Q1, Q3) | 0.30 (0.21, 0.42) |
| - Min - Max | 0.06 - 0.63 |
| Hate crime rate per 100,000 population | |
| - Mean (SD) | 0.30 (0.25) |
| - Median (Q1, Q3) | 0.23 (0.14, 0.35) |
| - Min - Max | 0.07 - 1.52 |
From the histogram below, we observe our outcome distribution has right skewness, suggesting that we may need to check our normality assumption. Our QQ Plot also indicates severe departures from normality.
#Histogram of Outcome Distribution
hate_crime %>%
ggplot(aes(x = hate_crimes_per_100k_splc)) +
geom_histogram(color = "red", fill = "black") +
labs(
title = "Distribution of Hate Crime Rates in 50 US States",
x = "Hate Crime Rate per 100,000 Population",
y = "Frequency of Distribution",
caption = "Distribution of Hate Crime Rates ( 50 US States)")
#QQplot of Outcome Distribution
hate_crimes_per_100k_splc = hate_crime$hate_crimes_per_100k_splc
qqnorm(hate_crimes_per_100k_splc, col = 2, pch = 19, cex = 1.5)
qq_plot = qqline(hate_crimes_per_100k_splc, col = 1,lwd = 2,lty = 2)
After performing a Shapiro-Wilk test to check the normality assumption of our outcome distribution, we find evidence to suggest that our data deviates from normality.
# Perform Shapiro-Wilk test
shapiro.test(hate_crimes_per_100k_splc) %>%
broom::tidy() %>%
knitr::kable("simple")
| statistic | p.value | method |
|---|---|---|
| 0.7107896 | 0 | Shapiro-Wilk normality test |
We apply a square root transformation and a natural log transformation to our outcome distribution, and compare the results of the data.
sqrt_transformation = hate_crime %>%
ggplot(aes(x = sqrt(hate_crimes_per_100k_splc))) +
geom_histogram(color = "red", fill = "black") +
labs(
title = "Distribution of sqrt(Hate Crime Rates) in 50 US States",
x = "sqrt(Hate Crime Rate per 100,000 Population)",
y = "Frequency of Distribution",
caption = "Distribution of Hate Crime Rates ( 50 US States)")
sqrt_qqplot = ggplot(hate_crime, aes(sample = sqrt(hate_crimes_per_100k_splc))) +
stat_qq() + stat_qq_line() +
labs(
title = "QQ Plot of sqrt(Hate Crime Rates) in 50 US States",
x = "sqrt(Hate Crime Rate per 100,000 Population)",
y = "Frequency of Distribution",
caption = "Distribution of Hate Crime Rates ( 50 US States)")
ln_transformation = hate_crime %>%
ggplot(aes(x = log(hate_crimes_per_100k_splc))) +
geom_histogram(color = "red", fill = "black") +
labs(
title = "Distribution of ln(Hate Crime Rates) in 50 US States",
x = "ln(Hate Crime Rate per 100,000 Population)",
y = "Frequency of Distribution",
caption = "Distribution of Hate Crime Rates ( 50 US States)")
ln_qqplot = ggplot(hate_crime, aes(sample = log(hate_crimes_per_100k_splc))) + stat_qq() + stat_qq_line() +
labs(
title = "QQ Plot of ln(Hate Crime Rates) in 50 US States",
x = "ln(Hate Crime Rate per 100,000 Population)",
y = "Frequency of Distribution",
caption = "Distribution of Hate Crime Rates ( 50 US States)")
After visual inspection, we observe that our natural log transformation may be a good candidate to re-test our normality assumptions.
(sqrt_transformation + ln_transformation) / ( sqrt_qqplot + ln_qqplot)
From the results of our test, we observe that we fail to reject the null (our p-value > 0.05) and can state with 95% confidence that our natural log transformation does not significantly deviate from normality, so we can assume normality henceforth.
shapiro.test(log(hate_crimes_per_100k_splc)) %>%
broom::tidy() %>%
knitr::kable("simple", caption = "Shapiro Wilk Test")
| statistic | p.value | method |
|---|---|---|
| 0.9830847 | 0.7452961 | Shapiro-Wilk normality test |
Box-Cox transformation was utilized to find out the recommended transformation. The optimal value of lambda is near 0, indicating that a natural logarithm transformation of the outcome is best for further analysis.
removed_states = hate_crime %>%
dplyr::select(-state)
fit = lm(hate_crimes_per_100k_splc ~ ., data = removed_states)
fit %>% MASS::boxcox()
hate_crime = hate_crime %>%
mutate(
ln_hate_crimes_per_100k_splc = log(hate_crimes_per_100k_splc)
)
hate_crime %>%
ggplot(aes(x = hate_crimes_per_100k_splc, y = state, colors = state)) +
geom_col(color = "blue") +
labs(
title = "Outlier Analysis of 50 US States",
x = "Hate Crime Rate per 100,000 Population",
y = "Frequency of Distribution",
caption = "Distribution of Hate Crime Rates (50 US States)"
)
Upon Plotting a column graph of the hate crimes against their respective states, we can see that Wyoming, South Dakota, and North Dakota had no values and District of Columbia, Washington, Oregon, Minnesota, Massachusetts and Maine showed relatively large columns.
After Plotting a scatter plot of the same values, it was evident that these states were outliers that influenced the data set.
hate_crime %>%
ggplot(aes(y = hate_crimes_per_100k_splc, x = state, colors = state)) +
geom_point(aes(color = state)) +
geom_smooth(method = "lm", se = F, color = "red") +
theme(axis.text.x = element_text(angle = 90),
legend.position = "none") +
labs(
title = "Outlier Analysis of 50 US States",
x = "State",
y = "Hate Crime Rate per 100,000 Population",
caption = "Distribution of Hate Crime Rates (50 US States)"
)
We verify if the association between income inequality (median household income in this case), holds true, as well as explore associations of all the other covariates mentioned above and draw your own conclusions about each predictor’s significance.
hate_crime %>%
dplyr::select(-state,-unemployment,-urbanization) %>%
cor() %>%
knitr::kable(digits = 2)
| median_household_income | perc_population_with_high_school_degree | perc_non_citizen | gini_index | perc_non_white | hate_crimes_per_100k_splc | ln_hate_crimes_per_100k_splc | |
|---|---|---|---|---|---|---|---|
| median_household_income | 1.00 | 0.65 | 0.30 | -0.13 | 0.04 | 0.34 | 0.31 |
| perc_population_with_high_school_degree | 0.65 | 1.00 | -0.26 | -0.54 | -0.50 | 0.26 | 0.30 |
| perc_non_citizen | 0.30 | -0.26 | 1.00 | 0.48 | 0.75 | 0.24 | 0.14 |
| gini_index | -0.13 | -0.54 | 0.48 | 1.00 | 0.55 | 0.38 | 0.22 |
| perc_non_white | 0.04 | -0.50 | 0.75 | 0.55 | 1.00 | 0.11 | -0.01 |
| hate_crimes_per_100k_splc | 0.34 | 0.26 | 0.24 | 0.38 | 0.11 | 1.00 | 0.89 |
| ln_hate_crimes_per_100k_splc | 0.31 | 0.30 | 0.14 | 0.22 | -0.01 | 0.89 | 1.00 |
hate_crime %>%
dplyr::select(-state,-unemployment,-urbanization) %>% #removing factor variables
cor() %>%
corrplot::corrplot(method = "circle", type = "upper", diag = FALSE)
a = ggscatter(hate_crime, x = "median_household_income", y = "hate_crimes_per_100k_splc",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "Median Household Income", ylab = "Hate Crime Rate (per 100k pop.)")
b = ggscatter(hate_crime, x = "perc_population_with_high_school_degree", y = "hate_crimes_per_100k_splc",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "% of People 25+ with High School Degree", ylab = "Hate Crime Rate (per 100k pop.)")
c = ggscatter(hate_crime, x = "perc_non_citizen", y = "hate_crimes_per_100k_splc",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "% of People Non-US Citizens", ylab = "Hate Crime Rate (per 100k pop.)")
d = ggscatter(hate_crime, x = "gini_index", y = "hate_crimes_per_100k_splc",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "Income Inequality Index (0-100)", ylab = "Hate Crime Rate (per 100k pop.)")
e = ggscatter(hate_crime, x = "perc_non_white", y = "hate_crimes_per_100k_splc",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab = "% of People Non-White", ylab = "Hate Crime Rate (per 100k pop.)")
From our results, we observe that predictors, “gini_index” and “median_household_income” have the highest correlations to our outcome of interest.
(a + b + c) / (d + e)
# Scatter plot showing associations between numeric variables
hate_crime %>%
dplyr::select(-state,-unemployment,-urbanization) %>%
pairs()
# fitting MLR model on tidy data without state variable
mult_fit <-
lm(
ln_hate_crimes_per_100k_splc ~ unemployment + urbanization + median_household_income + perc_population_with_high_school_degree + perc_non_citizen + gini_index + perc_non_white,
data = hate_crime
)
vif(mult_fit) %>% knitr::kable("simple")
| x | |
|---|---|
| unemploymentlow | 1.426492 |
| urbanizationhigh | 1.983246 |
| median_household_income | 3.108161 |
| perc_population_with_high_school_degree | 3.895361 |
| perc_non_citizen | 3.728286 |
| gini_index | 1.845436 |
| perc_non_white | 3.236419 |
All the predictors have a VIF below 5. This suggests that it would not be problematic to include them in the construction of the model. However, the correlation analysis shows that variables perc_non_white and perc_non_citizen have a moderate linear relationship with a correlation coefficient of 0.75.
ggplot(hate_crimedft,
aes(
x = gini_index,
y = ln_hate_crimes_per_100k_splc,
colour = factor(unemployment)
)) +
geom_point(size = 2) +
geom_smooth(method = "lm",
se = F,
aes(
group = factor(unemployment),
color = factor(unemployment)
)) +
labs(title = "Scatterplot of ln(hate crime per 100k people) vs ln(income equality) by Unemployment Status",
x = "ln(gini index)", y = "ln(hate crime per 100k people)") +
scale_color_manual(
name = "Unemployment",
labels = c("Low", "High"),
values = c("blue", "red")
)
reg1t <-
lm(ln_hate_crimes_per_100k_splc ~ gini_index * factor(unemployment),
data = hate_crimedft)
summary(reg1t)
##
## Call:
## lm(formula = ln_hate_crimes_per_100k_splc ~ gini_index * factor(unemployment),
## data = hate_crimedft)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.08066 -0.39519 -0.00407 0.30086 1.51569
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.0684 2.6960 0.767 0.447
## gini_index 2.9069 2.3159 1.255 0.217
## factor(unemployment)1 0.5293 3.6641 0.144 0.886
## gini_index:factor(unemployment)1 0.8122 3.2178 0.252 0.802
##
## Residual standard error: 0.6305 on 41 degrees of freedom
## Multiple R-squared: 0.1214, Adjusted R-squared: 0.05711
## F-statistic: 1.888 on 3 and 41 DF, p-value: 0.1466
There is no significant interaction at 5% significance level. The relationship between hate crime per 100k people and income equality does not vary by unemployment status.
#Scatter plot - Hate_crime_per_100k_splc vs. gini index by urbanization
ggplot(hate_crimedft, aes(x =gini_index, y = ln_hate_crimes_per_100k_splc, colour = factor(urbanization))) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = F,
aes(group = factor(urbanization),
color = factor(urbanization))) +
labs(title = "Scatterplot of ln(hate crime per 100k people) vs ln(income equality) by Urbanization Status",
x = "ln(gini index)", y = "ln(hate crime per 100k people)") +
scale_color_manual(name = "Urbanization", labels = c("Low", "High"), values = c("blue", "red"))
reg2t <- lm(ln_hate_crimes_per_100k_splc ~ gini_index*factor(urbanization), data = hate_crimedft)
summary(reg2t)
##
## Call:
## lm(formula = ln_hate_crimes_per_100k_splc ~ gini_index * factor(urbanization),
## data = hate_crimedft)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.23999 -0.42661 -0.03661 0.42869 1.25787
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.019 3.464 -0.872 0.388
## gini_index -1.263 2.970 -0.425 0.673
## factor(urbanization)1 4.834 4.113 1.175 0.247
## gini_index:factor(urbanization)1 4.081 3.579 1.140 0.261
##
## Residual standard error: 0.6443 on 41 degrees of freedom
## Multiple R-squared: 0.08241, Adjusted R-squared: 0.01527
## F-statistic: 1.227 on 3 and 41 DF, p-value: 0.312
There is no significant interaction at 5% significance level.The relationship between hate crime per 100k people and income equality does not vary by urbanization status.
ggplot(
hate_crimedft,
aes(
x = perc_population_with_high_school_degree,
y = ln_hate_crimes_per_100k_splc,
colour = factor(unemployment)
)
) +
geom_point(size = 2) +
geom_smooth(method = "lm",
se = F,
aes(
group = factor(unemployment),
color = factor(unemployment)
)) +
labs(title = "Scatterplot of ln(hate crime per 100k people) vs ln(education level) by Unemployment status",
x = "ln(percentage of population with high school degree and higher)", y = "ln(hate crime per 100k people)") +
scale_color_manual(
name = "Unemployment",
labels = c("Low", "High"),
values = c("blue", "red")
)
reg11t <-
lm(
ln_hate_crimes_per_100k_splc ~ perc_population_with_high_school_degree * factor(unemployment),
data = hate_crimedft
)
summary(reg11t)
##
## Call:
## lm(formula = ln_hate_crimes_per_100k_splc ~ perc_population_with_high_school_degree *
## factor(unemployment), data = hate_crimedft)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2361 -0.3816 0.1036 0.3528 1.7602
##
## Coefficients:
## Estimate
## (Intercept) -1.1389
## perc_population_with_high_school_degree 0.9443
## factor(unemployment)1 0.9631
## perc_population_with_high_school_degree:factor(unemployment)1 4.8987
## Std. Error
## (Intercept) 0.5088
## perc_population_with_high_school_degree 2.6854
## factor(unemployment)1 0.8374
## perc_population_with_high_school_degree:factor(unemployment)1 3.8931
## t value Pr(>|t|)
## (Intercept) -2.238 0.0307
## perc_population_with_high_school_degree 0.352 0.7269
## factor(unemployment)1 1.150 0.2567
## perc_population_with_high_school_degree:factor(unemployment)1 1.258 0.2154
##
## (Intercept) *
## perc_population_with_high_school_degree
## factor(unemployment)1
## perc_population_with_high_school_degree:factor(unemployment)1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.63 on 41 degrees of freedom
## Multiple R-squared: 0.1228, Adjusted R-squared: 0.05865
## F-statistic: 1.914 on 3 and 41 DF, p-value: 0.1424
There is no significant interaction at 5% significance level. The relationship between hate crime per 100k people and education level does not vary by unemployment status.
#Scatter plot - Hate_crime_per_100k_splc vs. education level by urbanization
ggplot(hate_crimedft, aes(x = perc_population_with_high_school_degree, y = ln_hate_crimes_per_100k_splc, colour = factor(urbanization))) +
geom_point(size = 2) +
geom_smooth(method = "lm", se = F,
aes(group = factor(urbanization),
color = factor(urbanization))) +
labs(title = "Scatterplot of ln(hate crime per 100k people) vs ln(education level) by Urbanization status",
x = "ln(percentage of population with high school degree or higher)", y = "ln(hate crime per 100k people)") +
scale_color_manual(name = "Urbanization", labels = c("Low", "High"), values = c("blue", "red"))
reg22t <- lm(ln_hate_crimes_per_100k_splc ~ perc_population_with_high_school_degree*factor(urbanization), data = hate_crimedft)
summary(reg22t)
##
## Call:
## lm(formula = ln_hate_crimes_per_100k_splc ~ perc_population_with_high_school_degree *
## factor(urbanization), data = hate_crimedft)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.37000 -0.40173 0.02948 0.45744 1.62894
##
## Coefficients:
## Estimate
## (Intercept) -1.1362
## perc_population_with_high_school_degree 2.0724
## factor(urbanization)1 1.1960
## perc_population_with_high_school_degree:factor(urbanization)1 4.2938
## Std. Error
## (Intercept) 0.4708
## perc_population_with_high_school_degree 2.2729
## factor(urbanization)1 0.7401
## perc_population_with_high_school_degree:factor(urbanization)1 3.4439
## t value Pr(>|t|)
## (Intercept) -2.413 0.0204
## perc_population_with_high_school_degree 0.912 0.3672
## factor(urbanization)1 1.616 0.1138
## perc_population_with_high_school_degree:factor(urbanization)1 1.247 0.2196
##
## (Intercept) *
## perc_population_with_high_school_degree
## factor(urbanization)1
## perc_population_with_high_school_degree:factor(urbanization)1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6118 on 41 degrees of freedom
## Multiple R-squared: 0.1727, Adjusted R-squared: 0.1122
## F-statistic: 2.853 on 3 and 41 DF, p-value: 0.04888
There is no significant interaction at 5% significance level.The relationship between hate crime per 100k people and education level does not vary by urbanization status.
Fit model with all predictors
hate_crime_no_dc <- hate_crime[c(-9),] %>%
dplyr::select(-state)
mult.fit <- lm(log(hate_crimes_per_100k_splc) ~ ., data = hate_crime_no_dc)
step(mult.fit, direction='both')
## Start: AIC=-3232.37
## log(hate_crimes_per_100k_splc) ~ unemployment + urbanization +
## median_household_income + perc_population_with_high_school_degree +
## perc_non_citizen + gini_index + perc_non_white + ln_hate_crimes_per_100k_splc
##
## Df Sum of Sq RSS AIC
## - perc_non_white 1 0.000 0.000 -3365.4
## <none> 0.000 -3232.4
## - perc_non_citizen 1 0.000 0.000 -3232.2
## - perc_population_with_high_school_degree 1 0.000 0.000 -3222.0
## - urbanization 1 0.000 0.000 -3220.0
## - unemployment 1 0.000 0.000 -3209.8
## - median_household_income 1 0.000 0.000 -3193.4
## - gini_index 1 0.000 0.000 -3191.0
## - ln_hate_crimes_per_100k_splc 1 11.909 11.909 -41.5
##
## Step: AIC=-3365.39
## log(hate_crimes_per_100k_splc) ~ unemployment + urbanization +
## median_household_income + perc_population_with_high_school_degree +
## perc_non_citizen + gini_index + ln_hate_crimes_per_100k_splc
##
## Df Sum of Sq RSS AIC
## - median_household_income 1 0.000 0.000 -3426.7
## <none> 0.000 -3365.4
## + perc_non_white 1 0.000 0.000 -3363.4
## - urbanization 1 0.000 0.000 -3320.4
## - perc_non_citizen 1 0.000 0.000 -3300.7
## - unemployment 1 0.000 0.000 -3268.2
## - perc_population_with_high_school_degree 1 0.000 0.000 -3215.4
## - gini_index 1 0.000 0.000 -3183.2
## - ln_hate_crimes_per_100k_splc 1 11.983 11.983 -43.2
##
## Step: AIC=-3426.7
## log(hate_crimes_per_100k_splc) ~ unemployment + urbanization +
## perc_population_with_high_school_degree + perc_non_citizen +
## gini_index + ln_hate_crimes_per_100k_splc
##
## Df Sum of Sq RSS AIC
## + perc_non_white 1 0.000 0.000 -3428.5
## <none> 0.000 -3426.7
## + median_household_income 1 0.000 0.000 -3425.3
## - gini_index 1 0.000 0.000 -3309.9
## - perc_population_with_high_school_degree 1 0.000 0.000 -3271.3
## - urbanization 1 0.000 0.000 -3188.3
## - perc_non_citizen 1 0.000 0.000 -3169.9
## - unemployment 1 0.000 0.000 -3125.0
## - ln_hate_crimes_per_100k_splc 1 12.048 12.048 -45.0
##
## Step: AIC=-3428.51
## log(hate_crimes_per_100k_splc) ~ unemployment + urbanization +
## perc_population_with_high_school_degree + perc_non_citizen +
## gini_index + ln_hate_crimes_per_100k_splc + perc_non_white
##
## Df Sum of Sq RSS AIC
## <none> 0.000 -3428.5
## + median_household_income 1 0.000 0.000 -3427.6
## - perc_non_white 1 0.000 0.000 -3426.7
## - gini_index 1 0.000 0.000 -3309.1
## - perc_population_with_high_school_degree 1 0.000 0.000 -3271.6
## - urbanization 1 0.000 0.000 -3186.3
## - perc_non_citizen 1 0.000 0.000 -3168.2
## - unemployment 1 0.000 0.000 -3123.6
## - ln_hate_crimes_per_100k_splc 1 11.957 11.957 -43.3
##
## Call:
## lm(formula = log(hate_crimes_per_100k_splc) ~ unemployment +
## urbanization + perc_population_with_high_school_degree +
## perc_non_citizen + gini_index + ln_hate_crimes_per_100k_splc +
## perc_non_white, data = hate_crime_no_dc)
##
## Coefficients:
## (Intercept)
## -1.713e-17
## unemploymentlow
## -1.126e-19
## urbanizationhigh
## -3.782e-18
## perc_population_with_high_school_degree
## 1.608e-17
## perc_non_citizen
## 9.671e-17
## gini_index
## 0.000e+00
## ln_hate_crimes_per_100k_splc
## 1.000e+00
## perc_non_white
## -3.726e-17
Based on the results of stepwise procedure, we choose model with 2 predictors: percent of adults 25 and older with a high school degreee and gini index.
stepwise_log_fit = lm(
log(hate_crimes_per_100k_splc) ~ perc_population_with_high_school_degree + gini_index,
data = hate_crime_no_dc
)
#Check model assumptions:
par(mfrow = c(2, 2))
plot(stepwise_log_fit)
set.seed(1)
data_train <- trainControl(method = "cv", number = 5)
model_caret <-
train(
log(hate_crimes_per_100k_splc) ~ perc_population_with_high_school_degree + gini_index,
data = hate_crime_no_dc,
trControl = data_train,
method = 'lm',
na.action = na.pass
)
model_caret
## Linear Regression
##
## 44 samples
## 2 predictor
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 34, 36, 35, 35, 36
## Resampling results:
##
## RMSE Rsquared MAE
## 0.5554347 0.153031 0.474133
##
## Tuning parameter 'intercept' was held constant at a value of TRUE